{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyPzSfSBxih9r9ybBz5R2FX/"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","metadata":{"id":"e7e7ba04"},"source":["from collections import Counter\n","import re"],"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Sample web content\n","web_content = \"\"\"Cheap watches available now! best cheap watches at cheap prices, best cheap watches online, cheap, cheap!!\"\"\""],"metadata":{"id":"EXrZ2ewbs6eW"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Extract words\n","words = re.findall(r'\\b\\w+\\b', web_content.lower())"],"metadata":{"id":"EFSJYe0_s-jc"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Count word frequencies\n","keyword_counts = Counter(words)"],"metadata":{"id":"lqyYTh_ntFcG"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Threshold for spam detection\n","SPAM_THRESHOLD = 4"],"metadata":{"id":"bhp57IB-tHbF"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Display keyword frequencies\n","print(\"Keyword Frequencies:\")\n","for word, count in keyword_counts.items():\n"," print(f\"{word}: {count}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Bb3wASgJtLG3","executionInfo":{"status":"ok","timestamp":1758817762381,"user_tz":-330,"elapsed":26,"user":{"displayName":"YOGESH SAHU","userId":"09036209155327710676"}},"outputId":"508b2430-6850-4c7a-bf45-b903c4f1d204"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Keyword Frequencies:\n","cheap: 6\n","watches: 3\n","available: 1\n","now: 1\n","best: 2\n","at: 1\n","prices: 1\n","online: 1\n"]}]},{"cell_type":"code","source":["# Detect potential spam keywords\n","print(\"\\nPotential Spam Keywords:\")\n","for word, count in keyword_counts.items():\n"," if count >= SPAM_THRESHOLD:\n"," print(f\"'{word}' appears {count} times (possible keyword stuffing)\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"kMk38UEMtNCo","executionInfo":{"status":"ok","timestamp":1758817764805,"user_tz":-330,"elapsed":41,"user":{"displayName":"YOGESH SAHU","userId":"09036209155327710676"}},"outputId":"571490c0-8fa3-4415-ed57-3d0f29a89e5c"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","Potential Spam Keywords:\n","'cheap' appears 6 times (possible keyword stuffing)\n"]}]},{"cell_type":"code","source":["from collections import Counter\n","import re\n","\n","web_content = \"\"\"Cheap watches available now! Best cheap watches for you.\n","Buy cheap watches online. Cheap cheap cheap watches watches!\"\"\"\n","\n","# Extract words (lowercase, ignore punctuation)\n","words = re.findall(r'\\b\\w+\\b', web_content.lower())\n","\n","# Count word frequencies\n","keyword_counts = Counter(words)\n","\n","# Threshold for spam detection\n","SPAM_THRESHOLD = 4\n","\n","# Print frequencies\n","print(\"Keyword Frequencies:\")\n","for word, count in keyword_counts.items():\n"," print(f\"{word}: {count}\")\n","\n","# Detect potential spam keywords\n","print(\"\\nPotential Spam Keywords:\")\n","for word, count in keyword_counts.items():\n"," if count >= SPAM_THRESHOLD:\n"," print(f\"'{word}' appears {count} times (possible keyword stuffing)\")\n"],"metadata":{"id":"k4VK6xBBQl7p","executionInfo":{"status":"ok","timestamp":1759598773750,"user_tz":-330,"elapsed":30,"user":{"displayName":"YOGESH SAHU","userId":"09036209155327710676"}},"outputId":"95b1d208-5142-4d6c-abfd-31ea8e3a8fa1","colab":{"base_uri":"https://localhost:8080/"}},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Keyword Frequencies:\n","cheap: 6\n","watches: 5\n","available: 1\n","now: 1\n","best: 1\n","for: 1\n","you: 1\n","buy: 1\n","online: 1\n","\n","Potential Spam Keywords:\n","'cheap' appears 6 times (possible keyword stuffing)\n","'watches' appears 5 times (possible keyword stuffing)\n"]}]}]}